rm(list = ls())

## ---------------------------------------
## Load Packages 
## ---------------------------------------

library('AER')
library('ivpack')
library('data.table')

## ---------------------------------------
## Load Data and Functions
## ---------------------------------------
load("../data0812.RData")
load("../data0812missing.RData")

directory <- "../functions/"
functions <- list.files(directory)  
loadfunctions <- sapply(functions, FUN = function(x)source(paste0(directory, x)))

## ---------------------------------------
# Construct Instrument
## ---------------------------------------
data0812 <- constructIV(data0812)

## ---------------------------------------
#  Select Last Case Before Election
## ---------------------------------------
data0812final <- lastCase(data0812)

## ---------------------------------------
#  Main paper, intext calculations 
## ---------------------------------------
print(paste0("Altogether, the match rate is ", round(sum(data0812final$prob_m)/nrow(data0812final),2), "."))
## ---------------------------------------
cases.per <- data0812[, .(count = .N), by = uniqueid]
print(paste0("For defendants with multiple cases in the time period (",round(mean(cases.per$count > 1)*100), "%)..."))
## ---------------------------------------
print(paste0("Finally, for those defendants with valid zip codes in Pennsylvania (", round(mean(!is.na(data0812final$quants_inc)),2)*100, "%)..."))
## ---------------------------------------
print(paste0("Detainees’ pretrial jail time...a median of about ", round(median(data0812final$days_jail[data0812final$pti ==1])/30,1),
      " months. Their median bail was ", round(median(data0812final$firstBailAmt[data0812final$pti ==1], na.rm=T)), "..."))
## ---------------------------------------
# construct year-OGS
data0812$court_time1_OGS <- paste0(data0812$court_time1, data0812$totOGS2)
print(paste0("The median number of cases per magistrate-by-year is ", median(table(data0812$judge_cat, data0812$court_time1)), 
      " and the median number of cases per magistrate-by-year-by-OGS-tercile is ", median(table(data0812$judge_cat, data0812$court_time1_OGS)), "."))

## ---------------------------------------
print(paste0("The leave-out-case pretrial detention rate ranges from ", round(min(data0812$judgeiv),2), " to ", round(max(data0812$judgeiv),2),
             " with an average of ", round(mean(data0812$judgeiv),2), " and a standard deviation of ", round(sd(data0812$judgeiv),2), ".")) 

## ---------------------------------------
print(paste0("Moving from the most to the least lenient magistrate increases the likelihood of pretrial detention by ", 
             round((max(data0812$judgeiv[data0812$totOGS2 == 1]) - min(data0812$judgeiv[data0812$totOGS2 == 1]))*100),
             " percentage points for defendants in the lowest tercile of offense severity, almost ", 
             round((max(data0812$judgeiv[data0812$totOGS2 == 2]) - min(data0812$judgeiv[data0812$totOGS2 == 2]))*100),
             " points for those in the middle tercile, and ",round((max(data0812$judgeiv[data0812$totOGS2 == 3]) - min(data0812$judgeiv[data0812$totOGS2 == 3]))*100),
             " points for those with the most serious offenses."))

## ---------------------------------------
## Appendix
## ---------------------------------------
# The minimum number of cases per year and case severity tercile is 5,725 and the median is 8,932; 
data0812$court_time1rev <- ifelse(data0812$court_time1 %in% c("3", "5"), 3, data0812$court_time1)
print(paste0("The minimum number of cases per year and case severity tercile is ", min(table(data0812$court_time1, data0812$totOGS2)),
             " and the median is ", median(table(data0812$court_time1, data0812$totOGS2)), 
             " (or ", min(table(data0812$court_time1rev, data0812$totOGS2)),  " and " ,median(table(data0812$court_time1rev, data0812$totOGS2)),  
             ", respectively, when we fold 2008 data into 2009 (since our sample only includes the last two months of 2008)."
             ))

## ---------------------------------------
# Most (67%) defendants in our sample have one case between 2008 and 2012, and 19% have two.
print(paste0("Most (", round(mean(cases.per$count ==1)*100), "%) defendants in our sample have one case between 2008 and 2012, and ",round(mean(cases.per$count == 2)*100), "% have two." ))

## ---------------------------------------
data0812.mult <- data0812[weight >= 2, ]
data0812.mult[, dif_date := max(as.Date(bail_date)) - sort(as.Date(bail_date), decreasing = TRUE)[2], by = c("uniqueid")]
print(paste0("Among such individuals, their two most recent cases are far apart (",round(as.numeric(mean(data0812.mult$dif_date))) , " days on average)"))


## ---------------------------------------
print(paste0("First, the age distribution of our sample skews young: over half (", round(mean(data0812final$age_2012<35)*100),
             "%) of defendants in our sample are under the age of 35 in 2012, and only ",round(mean(data0812final$age_2012>65)*100) ,"% was over 65.") )
## ---------------------------------------
print(paste0("In total, out of our final sample of ", dim(data0812final)[1]," we matched ",round(sum(data0812final$prob_m)),
             " records (",
             round(sum(data0812final$prob_m[data0812final$race == "Black"])),
             " Blacks and ",
             round(sum(data0812final$prob_m[data0812final$race == "White"])) ," Whites), of which ", 
             round(sum(data0812final$merge_type ==2)),
             " matches came from the 2014 uniform nationwide voter files and the remainder from the merge with the 2013 Pennsylvania file. "))
## ---------------------------------------
print(paste0("In the merge with the 2009 voter file to recover 2008 turnout, we find that out of the ", sum(data0812final$noteli08==0),
             " defendants that were old enough to vote in 2008,  we could match ", round(sum(data0812final$prob_m_09)) , " (", 
             round(sum(data0812final$prob_m_09[data0812final$race == "Black"]))," Blacks and ",round(sum(data0812final$prob_m_09[data0812final$race == "White"]))," Whites) records with the 2009 voter file"))

## ---------------------------------------
print(paste0("While our overall match rates are somewhat larger for Blacks (",
             round(sum(data0812final$prob_m_09[data0812final$race == "Black"])/dim(data0812final[data0812final$race == "Black",])[1]*100)
             ,"% in 2009 and ", round(sum(data0812final$prob_m[data0812final$race == "Black"])/dim(data0812final[data0812final$race == "Black",])[1]*100),
             "% in 2013) than Whites (",
             round(sum(data0812final$prob_m_09[data0812final$race == "White"])/dim(data0812final[data0812final$race == "White",])[1]*100)
             ,"% in 2009 and ", round(sum(data0812final$prob_m[data0812final$race == "White"])/dim(data0812final[data0812final$race == "White",])[1]*100),
             "% in 2013), the change from 2009 to 2013 in matching rates within race appears to have remained almost constant. "))
      

## ---------------------------------------
print(paste0("We observe ", length(unique(data0812$judge_cat[data0812$bail_date >= min(data0812$bail_date) & data0812$bail_date <= "2009-12-31"])),
 " magistrates between November 2008-March 2009, ", length(unique(data0812$judge_cat[data0812$bail_date >= "2009-04-01" & data0812$bail_date <= "2009-07-31"]))
 ," magistrates between April 2009-July 2009 and ", length(unique(data0812$judge_cat[data0812$bail_date >= "2009-08-01" & data0812$bail_date <= "2009-12-31"]))
, " magistrates between August 2009-December 2009."))

## ---------------------------------------
print(paste0("In fact, most (", round(mean(missing$bail_date < "2009-12-31")*100), "%) of the ", dim(missing)[1],
" cases without a named bail magistrate in the data from 2008-2012 occur in between 2008-2009"))

## ---------------------------------------
patterns.exact <- c("2-2-2-2", "2-2-2-NA")

print(paste0("When we instead use the deterministic approach... our match rate drops by only ",
             (round((sum(data0812$prob_m)/nrow(data0812)*100)) - round((sum(data0812final$prob_m[data0812final$pattern %in% patterns.exact])/nrow(data0812final))*100))
     ," points (to ", round((sum(data0812final$prob_m[data0812final$pattern %in% patterns.exact])/nrow(data0812final))*100), "%)"))


## ---------------------------------------
rm(list = ls())

## ---------------------------------------
## Load Packages 
## ---------------------------------------
require('AER')
require('ivpack')
require('data.table')

## ---------------------------------------
## Load Data and Functions
## ---------------------------------------
load("../data0812.RData")

directory <- "../functions/"
functions <- list.files(directory)  
loadfunctions <- sapply(functions, FUN = function(x)source(paste0(directory, x)))

## ---------------------------------------
# Construct Instrument
## ---------------------------------------
data0812 <- constructIV(data0812)

## ---------------------------------------
#  Select Last Case Before Election
## ---------------------------------------
data0812 <- lastCase(data0812)


## ---------------------------------------
## Row 1. Main Specifications
## ---------------------------------------

time.controls <- "as.factor(court_time1) + as.factor(court_time2) + as.factor(court_dow) + as.factor(court_shift) + as.factor(totOGS2)"
case.controls <-   "as.factor(any_drug_2) +  as.factor(any_violent_2) + as.factor(fire_arms_2) +  as.factor(any_rob_2) + as.factor(any_dui_2) + as.factor(prior_offender_2)"
demo.controls <- "age_2012 + I(age_2012^2) + Female + as.factor(race) + vote2008 + as.factor(noteli08) + regis_before"

outc.1 <- "vote2012"
endo.1 <- "pti"
inst.1 <- "judgeiv"

form.1 <- formula(paste(outc.1, "~", endo.1, "+" , time.controls, "|", inst.1, "+", time.controls))
form.2 <- formula(paste(outc.1, "~", endo.1, "+" , time.controls, "+", demo.controls, "|", inst.1, "+", time.controls, "+", demo.controls))
form.3 <- formula(paste(outc.1, "~", endo.1, "+" , time.controls, "+", demo.controls, "+", case.controls, "|", inst.1, "+", time.controls, "+", demo.controls, "+", case.controls))

biprobit.est <- -0.100
biprobit.se <- 0.010
   
m1a3 <- ivreg(form.3, data = data0812)
diff <- c(robust.se(m1a3)[2, c("Estimate")]) - c(biprobit.est)
se <- sqrt(c(robust.se(m1a3)[2, c("Std. Error")])^2 + c(biprobit.se)^2)

df.1 <- (c(robust.se(m1a3)[2, c("Std. Error")]) + c(biprobit.se))^2
df.2 <- (1/(m1a3$n - 1)) * c(robust.se(m1a3)[2, c("Std. Error")])^2 + (1/(m1a3$n - 1)) * c(biprobit.se)^2 
df <- df.1/df.2

cat(paste0("The biprobit estimates are statistically indistinguishable from their counterparts in Table J1; the t-statistic is ",
round(c(diff/se), 2), ", p = ", round(2 * c(pt(-abs(diff/se), df = df - 1)), 2), " (all defendants)."))
